# STAT 202 Project
# Random Forest
# Author: Fatih Sunor
#####################################################

# RF

rm(list = ls(all = TRUE));
train <- read.csv("training.csv",header=TRUE);
feature <- c(train[1:100,6:7], log(sqrt(train[1:100,9]*train[1:100,10])+1), train[1:100,11]);
relevance <- as.factor(train[[13]]);
relevance <- relevance[1:100];
error<-NULL;
temp<-data.frame(relevance,feature);
trees<-seq(500,1000,by=100);
for(i in trees){
	model<-randomForest(relevance~.,data=temp, keep.forest=TRUE, ntree=i); 
	p<-predict(model, feature);
	
	# Test
	error[((i/100)-4)]<-sum(relevance!=p)/length(relevance);
}
plot(trees,error , type="l", lwd=5, xlab="Number of trees", ylab="Classification Error", main="Random Forest Misclassification error at different three levels")